ggplot(winter_travel, aes(change_depth, user.count, color = Trailhead))+
geom_jitter(height = 0, width = 0.2, alpha = 0.5)+
facet_grid(modality~year)+
scale_y_log10()
ggplot(winter_travel, aes(change_depth, user.count, color = rating_near))+
geom_jitter(height = 0, width = 0.2, alpha = 0.7)+
scale_color_manual(values = c("green", "yellow", "orange", "red"))+
scale_y_log10()+
facet_grid(modality~Trailhead)
ggplot(winter_travel, aes(past3snow, user.count, color = rating_near))+
geom_jitter(height = 0, width = 0.2, alpha = 0.7)+
scale_color_manual(values = c("green", "yellow", "orange", "red"))+
scale_y_log10()+
facet_grid(modality~Trailhead)
ggplot(winter_travel, aes(past5snow, user.count, color = rating_near))+
geom_jitter(height = 0, width = 0.2, alpha = 0.7)+
scale_color_manual(values = c("green", "yellow", "orange", "red"))+
scale_y_log10()+
facet_grid(modality~Trailhead)
ggplot(all_users, aes(change_depth, user.count, color = rating_near))+
geom_jitter(height = 0, width = 0.2, alpha = 0.7)+
scale_color_manual(values = c("green", "yellow", "orange", "red"))+
scale_y_log10()
ggplot(all_users, aes(air_temp, user.count, color = rating_near))+
geom_jitter(height = 0, width = 0.2, alpha = 0.7)+
scale_color_manual(values = c("green", "yellow", "orange", "red"))+
scale_y_log10()
ggplot(all_users, aes(lag2snow, user.count, color = rating_near))+
geom_jitter(height = 0, width = 0.2, alpha = 0.7)+
scale_color_manual(values = c("green", "yellow", "orange", "red"))+
scale_y_log10()
ggplot(all_users, aes(lag3snow, user.count, color = rating_near))+
geom_jitter(height = 0, width = 0.2, alpha = 0.7)+
scale_color_manual(values = c("green", "yellow", "orange", "red"))+
scale_y_log10()
ggplot(all_users, aes(lag4snow, user.count, color = rating_near))+
geom_jitter(height = 0, width = 0.2, alpha = 0.7)+
scale_color_manual(values = c("green", "yellow", "orange", "red"))+
scale_y_log10()
pairs(winter_travel[,c("change_depth","past2snow","past3snow","past4snow","past5snow")])
pairs(winter_travel[,c("change_depth","lag2snow","lag3snow","lag4snow")])
glm0 <- glm(user.count ~ 1, data=all_users)
summary(glm0)
##
## Call:
## glm(formula = user.count ~ 1, data = all_users)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -95.28 -54.28 -19.28 32.72 509.72
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 95.280 2.807 33.95 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 6129.417)
##
## Null deviance: 4762557 on 777 degrees of freedom
## Residual deviance: 4762557 on 777 degrees of freedom
## AIC: 8995.7
##
## Number of Fisher Scoring iterations: 2
glm1 <- glm(user.count ~ change_depth+lag2snow+lag3snow+lag4snow+air_temp, family = poisson, data=all_users)
summary(glm1)
##
## Call:
## glm(formula = user.count ~ change_depth + lag2snow + lag3snow +
## lag4snow + air_temp, family = poisson, data = all_users)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -13.808 -6.263 -2.039 3.132 35.855
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.4291357 0.0129598 341.760 <2e-16 ***
## change_depth -0.0208990 0.0019166 -10.904 <2e-16 ***
## lag2snow -0.0025174 0.0018126 -1.389 0.1649
## lag3snow -0.0031262 0.0017749 -1.761 0.0782 .
## lag4snow -0.0014054 0.0017575 -0.800 0.4239
## air_temp 0.0052044 0.0004812 10.816 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 43596 on 777 degrees of freedom
## Residual deviance: 43281 on 772 degrees of freedom
## AIC: 47963
##
## Number of Fisher Scoring iterations: 5
hist(resid(glm1))
confint(glm1)
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 4.403701411 4.4545029514
## change_depth -0.024665112 -0.0171522714
## lag2snow -0.006078060 0.0010273450
## lag3snow -0.006612256 0.0003450671
## lag4snow -0.004858437 0.0020309387
## air_temp 0.004261695 0.0061478751
Don’t like this model. AIC went way up adding predictors. Overdispersion is definitely present. Negative binomial is probably a better choice for the conditional distribution.
glm0nb <- glm.nb(user.count ~ weekend, data=all_users)
summary(glm0nb)
##
## Call:
## glm.nb(formula = user.count ~ weekend, data = all_users, init.theta = 1.630430642,
## link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.7985 -0.8809 -0.1989 0.4486 2.7430
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.37478 0.03361 130.152 <2e-16 ***
## weekendweekend 0.52678 0.06246 8.434 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(1.6304) family taken to be 1)
##
## Null deviance: 944.56 on 777 degrees of freedom
## Residual deviance: 868.75 on 776 degrees of freedom
## AIC: 8523.8
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 1.6304
## Std. Err.: 0.0796
##
## 2 x log-likelihood: -8517.7890
glm1nb <- glm.nb(user.count ~ change_depth+lag3snow+air_temp+weekend, data=all_users)
summary(glm1nb)
##
## Call:
## glm.nb(formula = user.count ~ change_depth + lag3snow + air_temp +
## weekend, data = all_users, init.theta = 1.648230555, link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.7780 -0.8922 -0.2172 0.4375 2.8737
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.212314 0.097600 43.159 <2e-16 ***
## change_depth -0.027548 0.014053 -1.960 0.0500 *
## lag3snow 0.016459 0.013747 1.197 0.2312
## air_temp 0.006140 0.003631 1.691 0.0909 .
## weekendweekend 0.545700 0.062897 8.676 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(1.6482) family taken to be 1)
##
## Null deviance: 954.29 on 777 degrees of freedom
## Residual deviance: 868.34 on 773 degrees of freedom
## AIC: 8520.5
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 1.6482
## Std. Err.: 0.0806
##
## 2 x log-likelihood: -8508.5040
hist(resid(glm1nb))
confint(glm1nb)
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 4.017397191 4.411521222
## change_depth -0.055587761 0.001578779
## lag3snow -0.010845054 0.044667768
## air_temp -0.001196478 0.013406582
## weekendweekend 0.423396724 0.669979550
Looking at AIC, adding predictors gives a slight improvement. Looks like about a 2.7% decrease in users per cm of new snow. Maybe about an 0.6% increase in users per degC of air temp. Recent snow doesn’t register a strong effect.
ggplot(na.omit(winter_travel), aes(rating_near, user.count, color = Trailhead, fill = Trailhead))+
geom_point(position=position_jitterdodge(jitter.width = 0.2))+
geom_boxplot(color="black", outlier.shape = NA)+
scale_y_log10()+
facet_wrap(~has_sled)
ggplot(na.omit(winter_travel), aes(rating_near, user.count))+
geom_jitter()+
geom_boxplot(outlier.shape = NA)+
scale_y_log10()+
facet_grid(has_sled~Trailhead)
We’ll model the trailheads independently since they are so different. Lots of things to say, but one note is that at Kebler, only high ratings lead to statistically significant drops in use, but at Slate, there’s a noticable drop at considerable and maybe even moderate.
glm2brushrd <- glm.nb(user.count ~ rating_near,
data=winter_travel[(winter_travel$has_sled==TRUE)&(winter_travel$Trailhead=="Brush Creek Rd"),])
summary(glm2brushrd)
##
## Call:
## glm.nb(formula = user.count ~ rating_near, data = winter_travel[(winter_travel$has_sled ==
## TRUE) & (winter_travel$Trailhead == "Brush Creek Rd"), ],
## init.theta = 0.05012781, link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.4192 -0.4192 -0.3019 -0.3019 1.6795
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.4307 0.5121 -2.794 0.00521 **
## rating_near2 -1.1686 0.6986 -1.673 0.09436 .
## rating_near3 -0.8718 0.8307 -1.049 0.29396
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(0.0501) family taken to be 1)
##
## Null deviance: 57.565 on 309 degrees of freedom
## Residual deviance: 54.482 on 307 degrees of freedom
## (78 observations deleted due to missingness)
## AIC: 197.92
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 0.0501
## Std. Err.: 0.0184
##
## 2 x log-likelihood: -189.9240
glm2brushth <- glm.nb(user.count ~ rating_near,
data=winter_travel[(winter_travel$has_sled==TRUE)&(winter_travel$Trailhead=="Brush Creek Trailhead"),])
summary(glm2brushth)
##
## Call:
## glm.nb(formula = user.count ~ rating_near, data = winter_travel[(winter_travel$has_sled ==
## TRUE) & (winter_travel$Trailhead == "Brush Creek Trailhead"),
## ], init.theta = 0.0557544471, link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.4045 -0.2330 -0.2256 -0.2256 2.3487
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.6818 0.7363 -2.284 0.0224 *
## rating_near2 -1.7522 0.9262 -1.892 0.0585 .
## rating_near3 -1.6716 0.9314 -1.795 0.0727 .
## rating_near4 -17.6208 2107.8559 -0.008 0.9933
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(0.0558) family taken to be 1)
##
## Null deviance: 47.436 on 360 degrees of freedom
## Residual deviance: 41.153 on 357 degrees of freedom
## (103 observations deleted due to missingness)
## AIC: 128.39
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 0.0558
## Std. Err.: 0.0325
##
## 2 x log-likelihood: -118.3850
glm2gothic <- glm.nb(user.count ~ rating_near,
data=winter_travel[(winter_travel$has_sled==TRUE)&(winter_travel$Trailhead=="Gothic Rd"),])
summary(glm2gothic)
##
## Call:
## glm.nb(formula = user.count ~ rating_near, data = winter_travel[(winter_travel$has_sled ==
## TRUE) & (winter_travel$Trailhead == "Gothic Rd"), ], init.theta = 0.1723991875,
## link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.5975 -0.5904 -0.5659 -0.5659 3.0120
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.19942 0.24991 -4.799 1.59e-06 ***
## rating_near2 -0.13248 0.30370 -0.436 0.663
## rating_near3 0.03846 0.33447 0.115 0.908
## rating_near4 -18.10317 2107.85569 -0.009 0.993
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(0.1724) family taken to be 1)
##
## Null deviance: 288.93 on 669 degrees of freedom
## Residual deviance: 281.83 on 666 degrees of freedom
## (106 observations deleted due to missingness)
## AIC: 823.63
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 0.1724
## Std. Err.: 0.0304
##
## 2 x log-likelihood: -813.6310
glm2cement <- glm.nb(user.count ~ rating_near,
data=winter_travel[(winter_travel$has_sled==TRUE)&(winter_travel$Trailhead=="Cement Creek"),])
summary(glm2cement)
##
## Call:
## glm.nb(formula = user.count ~ rating_near, data = winter_travel[(winter_travel$has_sled ==
## TRUE) & (winter_travel$Trailhead == "Cement Creek"), ], init.theta = 0.5804415395,
## link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.4035 -1.2659 -0.5091 0.3274 2.0827
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.88446 0.16041 5.514 3.51e-08 ***
## rating_near2 0.06602 0.18682 0.353 0.7238
## rating_near3 -0.33761 0.19837 -1.702 0.0888 .
## rating_near4 -1.04698 0.41315 -2.534 0.0113 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(0.5804) family taken to be 1)
##
## Null deviance: 527.70 on 501 degrees of freedom
## Residual deviance: 514.57 on 498 degrees of freedom
## (132 observations deleted due to missingness)
## AIC: 1958.9
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 0.5804
## Std. Err.: 0.0586
##
## 2 x log-likelihood: -1948.8960
glm2kebler <- glm.nb(user.count ~ rating_near,
data=winter_travel[(winter_travel$has_sled==TRUE)&(winter_travel$Trailhead=="Kebler"),])
summary(glm2kebler)
##
## Call:
## glm.nb(formula = user.count ~ rating_near, data = winter_travel[(winter_travel$has_sled ==
## TRUE) & (winter_travel$Trailhead == "Kebler"), ], init.theta = 0.497445695,
## link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.1331 -1.1530 -0.3222 0.3485 1.8286
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.8650 0.1519 25.440 < 2e-16 ***
## rating_near2 -0.1968 0.1781 -1.105 0.26907
## rating_near3 -0.2424 0.1912 -1.267 0.20503
## rating_near4 -1.2958 0.3729 -3.475 0.00051 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(0.4974) family taken to be 1)
##
## Null deviance: 601.85 on 492 degrees of freedom
## Residual deviance: 592.32 on 489 degrees of freedom
## (173 observations deleted due to missingness)
## AIC: 4462.8
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 0.4974
## Std. Err.: 0.0310
##
## 2 x log-likelihood: -4452.7740
glm2slate <- glm.nb(user.count ~ rating_near,
data=winter_travel[(winter_travel$has_sled==TRUE)&(winter_travel$Trailhead=="Slate River Rd"),])
summary(glm2slate)
##
## Call:
## glm.nb(formula = user.count ~ rating_near, data = winter_travel[(winter_travel$has_sled ==
## TRUE) & (winter_travel$Trailhead == "Slate River Rd"), ],
## init.theta = 1.035728402, link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0900 -0.9393 -0.2160 0.3116 4.0995
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.01432 0.09775 20.606 < 2e-16 ***
## rating_near2 -0.19287 0.11642 -1.657 0.09759 .
## rating_near3 -0.38399 0.12652 -3.035 0.00241 **
## rating_near4 -2.61216 0.38567 -6.773 1.26e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(1.0357) family taken to be 1)
##
## Null deviance: 742.48 on 596 degrees of freedom
## Residual deviance: 691.03 on 593 degrees of freedom
## (181 observations deleted due to missingness)
## AIC: 3372.8
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 1.0357
## Std. Err.: 0.0766
##
## 2 x log-likelihood: -3362.8170
glm2snodgrass <- glm.nb(user.count ~ rating_near,
data=winter_travel[(winter_travel$has_sled==TRUE)&(winter_travel$Trailhead=="Snodgrass"),])
summary(glm2snodgrass)
##
## Call:
## glm.nb(formula = user.count ~ rating_near, data = winter_travel[(winter_travel$has_sled ==
## TRUE) & (winter_travel$Trailhead == "Snodgrass"), ], init.theta = 0.1674608948,
## link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.8989 -0.7272 -0.7195 -0.5130 1.7779
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.5319 0.3736 1.424 0.15451
## rating_near2 -0.9710 0.4300 -2.258 0.02392 *
## rating_near3 -1.0131 0.4385 -2.310 0.02087 *
## rating_near4 -2.1413 0.8295 -2.581 0.00984 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(0.1675) family taken to be 1)
##
## Null deviance: 218.98 on 376 degrees of freedom
## Residual deviance: 208.91 on 373 degrees of freedom
## (399 observations deleted due to missingness)
## AIC: 761.24
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 0.1675
## Std. Err.: 0.0266
##
## 2 x log-likelihood: -751.2350
glm2washington <- glm.nb(user.count ~ rating_near,
data=winter_travel[(winter_travel$has_sled==TRUE)&(winter_travel$Trailhead=="Washington Gulch"),])
summary(glm2washington)
##
## Call:
## glm.nb(formula = user.count ~ rating_near, data = winter_travel[(winter_travel$has_sled ==
## TRUE) & (winter_travel$Trailhead == "Washington Gulch"),
## ], init.theta = 0.4299627548, link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.3892 -1.3396 -0.3314 0.2116 2.7192
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.0770 0.1380 7.804 6.02e-15 ***
## rating_near2 0.2111 0.1650 1.279 0.201
## rating_near3 0.0335 0.1805 0.186 0.853
## rating_near4 -2.4632 0.5791 -4.254 2.10e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(0.43) family taken to be 1)
##
## Null deviance: 700.48 on 673 degrees of freedom
## Residual deviance: 677.87 on 670 degrees of freedom
## (104 observations deleted due to missingness)
## AIC: 2960.4
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 0.4300
## Std. Err.: 0.0331
##
## 2 x log-likelihood: -2950.4220
ggplot(winter_travel, aes(air_temp, past5snow, size = user.count, color = rating_near))+
geom_jitter(height = 0.2, width = 0.2, alpha = 0.7)+
scale_color_manual(values = c("green", "yellow", "orange", "red"))
ggplot(winter_travel, aes(air_temp, change_depth, size = user.count, color = rating_near))+
geom_jitter(height = 0.2, width = 0.2, alpha = 0.7)+
scale_color_manual(values = c("green", "yellow", "orange", "red"))
ggplot(winter_travel, aes(air_temp, past5snow, size = user.count, color = rating_near))+
geom_jitter(height = 0, width = 0.2, alpha = 0.7)+
scale_color_manual(values = c("green", "yellow", "orange", "red"))+
facet_grid(modality~Trailhead)
glm3 <- glm.nb(user.count~air_temp+change_depth+snow_depth, data=winter_travel)
summary(glm3)
##
## Call:
## glm.nb(formula = user.count ~ air_temp + change_depth + snow_depth,
## data = winter_travel, init.theta = 0.1909529704, link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.2676 -1.2052 -0.7061 -0.0697 4.4103
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.259017 0.098000 23.051 < 2e-16 ***
## air_temp 0.010527 0.003252 3.237 0.00121 **
## change_depth -0.022163 0.012109 -1.830 0.06722 .
## snow_depth -0.010435 0.001754 -5.949 2.7e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(0.191) family taken to be 1)
##
## Null deviance: 8018.5 on 8667 degrees of freedom
## Residual deviance: 7976.1 on 8664 degrees of freedom
## (1852 observations deleted due to missingness)
## AIC: 44328
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 0.19095
## Std. Err.: 0.00350
##
## 2 x log-likelihood: -44318.48900
This ended up being basically the same as question 1, since the past days’ snow didn’t show significance.
This depends on the question, and the effect size that we care about.
glm5 <- glm.nb(user.count~weekend, data=winter_travel)
summary(glm5)
##
## Call:
## glm.nb(formula = user.count ~ weekend, data = winter_travel,
## init.theta = 0.1926010033, link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.2637 -1.1843 -0.7577 -0.0739 3.7320
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.96757 0.02944 66.829 <2e-16 ***
## weekendweekend 0.51481 0.05443 9.458 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(0.1926) family taken to be 1)
##
## Null deviance: 8073.7 on 8667 degrees of freedom
## Residual deviance: 7978.6 on 8666 degrees of freedom
## (1852 observations deleted due to missingness)
## AIC: 44272
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 0.19260
## Std. Err.: 0.00353
##
## 2 x log-likelihood: -44266.37600
glm5a <- glm.nb(user.count~week, data=winter_travel)
summary(glm5a)
##
## Call:
## glm.nb(formula = user.count ~ week, data = winter_travel, init.theta = 0.1926929823,
## link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.2729 -1.1858 -0.7428 -0.0550 3.6116
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.01370 0.06595 30.535 < 2e-16 ***
## weekMon -0.09208 0.09240 -0.997 0.319
## weekSat 0.52889 0.09218 5.738 9.60e-09 ***
## weekSun 0.40340 0.09264 4.354 1.33e-05 ***
## weekThu -0.05289 0.09360 -0.565 0.572
## weekTue -0.04771 0.09327 -0.511 0.609
## weekWed -0.03837 0.09336 -0.411 0.681
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(0.1927) family taken to be 1)
##
## Null deviance: 8076.7 on 8667 degrees of freedom
## Residual deviance: 7978.7 on 8661 degrees of freedom
## (1852 observations deleted due to missingness)
## AIC: 44279
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 0.19269
## Std. Err.: 0.00353
##
## 2 x log-likelihood: -44263.48100
Saturdays are about 13% busier than Sundays, weekdays are about half as busy as weekends.
ggplot(winter_travel, aes(year, user.count))+
geom_jitter(height = 0, width = 0.2, alpha = 0.7)+
facet_grid(modality~Trailhead)+
scale_y_log10()
day_totals <- all_users %>%
group_by(date, week, weekend) %>%
summarise(users = sum(user.count, na.rm = TRUE)) %>%
arrange(desc(users))
## `summarise()` regrouping output by 'date', 'week' (override with `.groups` argument)
head(day_totals, 10)
## # A tibble: 10 x 4
## # Groups: date, week [10]
## date week weekend users
## <fct> <fct> <fct> <int>
## 1 2018-02-17 Sat weekend 819
## 2 2018-02-18 Sun weekend 744
## 3 2018-03-03 Sat weekend 665
## 4 2018-01-27 Sat weekend 659
## 5 2018-12-29 Sat weekend 623
## 6 2018-03-26 Mon weekday 586
## 7 2018-12-30 Sun weekend 568
## 8 2019-02-23 Sat weekend 568
## 9 2018-03-11 Sun weekend 544
## 10 2018-03-21 Wed weekday 517
ggplot(day_totals, aes((yday(date)+30)%%365, users))+
geom_smooth()+
geom_point(aes(color=weekend))+
scale_x_continuous(breaks = c(0,31,62,90,120),
labels = c("Dec 1", "Jan 1", "Feb 1", "Mar 1", "Apr 1"))+
labs(x="")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Jump in usage in the last week of the calendar year, otherwise gradual increase until sometime mid March.
day_totals_modes <- all_locations %>%
group_by(date, week, weekend, modality, has_sled) %>%
summarise(users = sum(user.count, na.rm = TRUE)) %>%
arrange(desc(users))
## `summarise()` regrouping output by 'date', 'week', 'weekend', 'modality' (override with `.groups` argument)
head(day_totals_modes, 10)
## # A tibble: 10 x 6
## # Groups: date, week, weekend, modality [10]
## date week weekend modality has_sled users
## <fct> <fct> <fct> <fct> <lgl> <int>
## 1 2018-01-27 Sat weekend Non.motorized FALSE 601
## 2 2018-02-17 Sat weekend Non.motorized FALSE 557
## 3 2018-02-18 Sun weekend Non.motorized FALSE 514
## 4 2018-03-03 Sat weekend Non.motorized FALSE 468
## 5 2019-02-23 Sat weekend Non.motorized FALSE 445
## 6 2018-03-21 Wed weekday Non.motorized FALSE 387
## 7 2017-12-30 Sat weekend Non.motorized FALSE 367
## 8 2018-03-26 Mon weekday Non.motorized FALSE 356
## 9 2020-03-15 Sun weekend Non.motorized FALSE 342
## 10 2018-12-29 Sat weekend Non.motorized FALSE 331
ggplot(day_totals_modes, aes((yday(date)+30)%%365, users))+
geom_smooth()+
geom_point(aes(color=weekend))+
scale_x_continuous(breaks = c(0,31,62,90,120),
labels = c("Dec 1", "Jan 1", "Feb 1", "Mar 1", "Apr 1"))+
scale_y_log10()+
labs(x="")+
facet_grid(modality~1)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
day_totals_modes2 <- all_locations %>%
group_by(date, week, weekend, has_sled) %>%
summarise(users = sum(user.count, na.rm = TRUE)) %>%
arrange(desc(users))
## `summarise()` regrouping output by 'date', 'week', 'weekend' (override with `.groups` argument)
ggplot(day_totals_modes2, aes((yday(date)+30)%%365, users))+
geom_smooth()+
geom_point(aes(color=weekend))+
scale_x_continuous(breaks = c(0,31,62,90,120),
labels = c("Dec 1", "Jan 1", "Feb 1", "Mar 1", "Apr 1"))+
scale_y_log10()+
labs(x="")+
facet_grid(has_sled~1)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Variance is greater in the motorized groups, and the end of year and mid March peaks are more pronounced.
gam6 <- gam(user.count ~ modality+s((yday(date)+30)%%365), family = nb(), data=all_locations)
summary(gam6)
##
## Family: Negative Binomial(1.089)
## Link function: log
##
## Formula:
## user.count ~ modality + s((yday(date) + 30)%%365)
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.94062 0.05236 37.063 < 2e-16 ***
## modalityMechanized -0.60637 0.07610 -7.968 1.62e-15 ***
## modalityMotorized 2.11499 0.07174 29.479 < 2e-16 ***
## modalityNon.motorized 2.78623 0.07159 38.921 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s((yday(date) + 30)%%365) 8.014 8.74 124.2 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.529 Deviance explained = 59.5%
## -REML = 6353.6 Scale est. = 1 n = 1556
march_on <- all_users[month(all_users$date)%in%3:4,]
ggplot(march_on, aes((yday(march_on$date)+30%%365), user.count, color=year))+
geom_point()+geom_smooth()+
scale_x_continuous(breaks = c(90,122),
labels = c("Mar 1", "Apr 1"))+
scale_y_log10()+
labs(x="")+
facet_grid(has_sled~1)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
I don’t see anything.
ggplot(winter_travel, aes(year, user.count))+
geom_boxplot()+
geom_jitter(height = 0.2, width = 0.2, alpha = 0.2, aes( color = rating_near))+
scale_color_manual(values = c("green", "yellow", "orange", "red"))+
scale_y_log10()+
facet_grid(modality~Trailhead)
ggplot(all_modes, aes(year, user.count))+
geom_boxplot()+
geom_jitter(height = 0.2, width = 0.2, alpha = 0.2, aes( color = rating_near))+
scale_color_manual(values = c("green", "yellow", "orange", "red"))+
scale_y_log10()+
facet_grid(~Trailhead)
ggplot(all_locations, aes(year, user.count))+
geom_boxplot()+
geom_jitter(height = 0.2, width = 0.2, alpha = 0.2, aes( color = rating_near))+
scale_color_manual(values = c("green", "yellow", "orange", "red"))+
scale_y_log10()+
facet_grid(~modality)
ggplot(all_users, aes(year, user.count))+
geom_boxplot()+
geom_jitter(height = 0.2, width = 0.2, alpha = 0.2, aes( color = rating_near))+
scale_color_manual(values = c("green", "yellow", "orange", "red"))+
scale_y_log10()+
facet_grid(~has_sled)
glm10 <- glm.nb(user.count~year*(Trailhead+modality+rating_near), data = winter_travel)
summary(glm10)
##
## Call:
## glm.nb(formula = user.count ~ year * (Trailhead + modality +
## rating_near), data = winter_travel, init.theta = 0.4542202324,
## link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.3537 -1.0218 -0.5477 0.1078 7.7451
##
## Coefficients: (4 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.52396 0.16782 -3.122 0.001795
## yearw1819 -1.42704 0.17305 -8.246 < 2e-16
## yearw1920 -2.88716 0.29448 -9.804 < 2e-16
## TrailheadBrush Creek Trailhead 0.03192 0.18166 0.176 0.860534
## TrailheadCement Creek 0.70773 0.19115 3.702 0.000213
## TrailheadGothic Rd 1.27520 0.16837 7.574 3.63e-14
## TrailheadKebler 3.05372 0.17284 17.668 < 2e-16
## TrailheadSlate River Rd 1.84414 0.16159 11.413 < 2e-16
## TrailheadSnodgrass 1.48564 0.17050 8.713 < 2e-16
## TrailheadWashington Gulch 1.52497 0.16204 9.411 < 2e-16
## modalityMechanized -0.76322 0.10722 -7.118 1.09e-12
## modalityMotorized 0.78827 0.10091 7.811 5.66e-15
## modalityNon.motorized 2.77719 0.09929 27.970 < 2e-16
## rating_near2 -0.13479 0.07866 -1.713 0.086622
## rating_near3 -0.41104 0.09376 -4.384 1.17e-05
## rating_near4 -1.35995 0.15332 -8.870 < 2e-16
## yearw1819:TrailheadBrush Creek Trailhead -0.07789 0.18192 -0.428 0.668544
## yearw1920:TrailheadBrush Creek Trailhead NA NA NA NA
## yearw1819:TrailheadCement Creek 0.78027 0.18162 4.296 1.74e-05
## yearw1920:TrailheadCement Creek 0.83855 0.23308 3.598 0.000321
## yearw1819:TrailheadGothic Rd 0.28666 0.15776 1.817 0.069211
## yearw1920:TrailheadGothic Rd -0.02951 0.21393 -0.138 0.890273
## yearw1819:TrailheadKebler 0.32414 0.16003 2.025 0.042819
## yearw1920:TrailheadKebler -0.30533 0.21551 -1.417 0.156544
## yearw1819:TrailheadSlate River Rd 0.60280 0.14772 4.081 4.49e-05
## yearw1920:TrailheadSlate River Rd -0.65899 0.21222 -3.105 0.001902
## yearw1819:TrailheadSnodgrass 0.23169 0.15936 1.454 0.145973
## yearw1920:TrailheadSnodgrass 0.95354 0.24867 3.835 0.000126
## yearw1819:TrailheadWashington Gulch NA NA NA NA
## yearw1920:TrailheadWashington Gulch -0.39023 0.21378 -1.825 0.067951
## yearw1819:modalityMechanized 0.48735 0.14123 3.451 0.000559
## yearw1920:modalityMechanized 2.36967 0.25462 9.307 < 2e-16
## yearw1819:modalityMotorized 0.77078 0.13061 5.901 3.60e-09
## yearw1920:modalityMotorized 3.12716 0.24711 12.655 < 2e-16
## yearw1819:modalityNon.motorized 0.54190 0.12822 4.226 2.38e-05
## yearw1920:modalityNon.motorized 2.34700 0.24567 9.553 < 2e-16
## yearw1819:rating_near2 0.17102 0.12973 1.318 0.187386
## yearw1920:rating_near2 -0.08820 0.11383 -0.775 0.438449
## yearw1819:rating_near3 0.38573 0.13926 2.770 0.005608
## yearw1920:rating_near3 0.21801 0.14385 1.515 0.129648
## yearw1819:rating_near4 NA NA NA NA
## yearw1920:rating_near4 NA NA NA NA
##
## (Intercept) **
## yearw1819 ***
## yearw1920 ***
## TrailheadBrush Creek Trailhead
## TrailheadCement Creek ***
## TrailheadGothic Rd ***
## TrailheadKebler ***
## TrailheadSlate River Rd ***
## TrailheadSnodgrass ***
## TrailheadWashington Gulch ***
## modalityMechanized ***
## modalityMotorized ***
## modalityNon.motorized ***
## rating_near2 .
## rating_near3 ***
## rating_near4 ***
## yearw1819:TrailheadBrush Creek Trailhead
## yearw1920:TrailheadBrush Creek Trailhead
## yearw1819:TrailheadCement Creek ***
## yearw1920:TrailheadCement Creek ***
## yearw1819:TrailheadGothic Rd .
## yearw1920:TrailheadGothic Rd
## yearw1819:TrailheadKebler *
## yearw1920:TrailheadKebler
## yearw1819:TrailheadSlate River Rd ***
## yearw1920:TrailheadSlate River Rd **
## yearw1819:TrailheadSnodgrass
## yearw1920:TrailheadSnodgrass ***
## yearw1819:TrailheadWashington Gulch
## yearw1920:TrailheadWashington Gulch .
## yearw1819:modalityMechanized ***
## yearw1920:modalityMechanized ***
## yearw1819:modalityMotorized ***
## yearw1920:modalityMotorized ***
## yearw1819:modalityNon.motorized ***
## yearw1920:modalityNon.motorized ***
## yearw1819:rating_near2
## yearw1920:rating_near2
## yearw1819:rating_near3 **
## yearw1920:rating_near3
## yearw1819:rating_near4
## yearw1920:rating_near4
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(0.4542) family taken to be 1)
##
## Null deviance: 15614.6 on 8597 degrees of freedom
## Residual deviance: 7979.2 on 8560 degrees of freedom
## (1922 observations deleted due to missingness)
## AIC: 39191
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 0.45422
## Std. Err.: 0.00987
##
## 2 x log-likelihood: -39112.58100
high_risk <- as.numeric(winter_travel$rating_above)+
as.numeric(winter_travel$rating_near)+
as.numeric(winter_travel$rating_below) > 9
summary(glm.nb(user.count~high_risk, data=winter_travel))
##
## Call:
## glm.nb(formula = user.count ~ high_risk, data = winter_travel,
## init.theta = 0.1914928382, link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.2142 -1.2142 -0.6832 -0.0954 4.3940
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.17484 0.02532 85.890 <2e-16 ***
## high_riskTRUE -1.22998 0.14481 -8.494 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(0.1915) family taken to be 1)
##
## Null deviance: 7972.2 on 8597 degrees of freedom
## Residual deviance: 7919.7 on 8596 degrees of freedom
## (1922 observations deleted due to missingness)
## AIC: 44043
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 0.19149
## Std. Err.: 0.00352
##
## 2 x log-likelihood: -44037.24100
On high risk days the usage is about 30% of the average on days that are not high risk.